# Aim of script: generate Figure 7 of the elife paper
# Prepared by : Charles Agoti
# Last updated : 29 June 2022

rm(list=ls())

library(tidyverse); library(dplyr);library("lubridate");  library(tidytree); library(readxl)
library(treeio); library(artyfarty);library(wesanderson); library(janitor); library(scales);
library(lubridate); library(ggtree);library(patchwork); 
## Basic set-up
setwd("~/Dropbox/COVID-19/SECONDWAVE/global/")
color_coast <- c("#FF0000","#000000","#00FFFF",'#FFA500','#0000FF',"#FF00FF")
color_A <- c("#FF0000","#00FFFF",'#FFA500')
color_549 <- c("#FF0000","#00FF00","#00FFFF",'#FFA500',"#FF00FF")
color_530 <- c("#FF0000","#00FF00","#00FFFF",'#FFA500',"#FF00FF")
color_N.8 <- c("#FF0000","#00FF00","#FF00FF")
color_596.1 <- c("#FF0000","#00FF00","#00FFFF",'#FFA500','#0000FF',"#FF00FF")
color_sub_1<- c("#FF0000","#00FF00","#00FFFF",'#FFA500','#0000FF',"#FF00FF")
color_B.1.1 <- c("#FF0000","#00FF00","#00FFFF",'#FFA500','#0000FF',"#FF00FF")
plot_color <- c("#FF0000", "#FFFF00", "#00EAFF", "#AA00FF", "#FF7F00", "#BFFF00", "#0095FF","#FF00AA","#FFD400", "#6AFF00", "#0040FF","#EDB9B9", "#B9D7ED", 
                "#E7E9B9", "#DCB9ED", "#B9EDE0", "#8F2323", "#23628F", "#8F6A23", "#6B238F", "#4F8F23", "#000000", "#737373", "#CCCCCC")
plot_color2=c("#000000","#C0C0C0","#696969","#FF0000","#F2D2BD",
             "#800000","#C9A9A6","#00FF00","#008000","#00FFFF","#BA6B57", "#FFA500","#9933FF",
             "#A4C639","#0000FF","#CCCCFF","#FF00FF","#55ACEE", "#b1bca0")

coast_counties <- c("Mombasa", "Kilifi", "Kwale", "Taita Taveta", "Tana River", "Lamu")
  
metadata_df <-read.csv("~/Dropbox/COVID-19/SECONDWAVE/revision/Final/Figures/Figure 8/Source_data_2_global_metadata_26Feb2021.csv")

##########################################################............lineage A, mutational tree..........##############################################################
tree_A_m <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/A/new_tree.nwk")
p_A_m <- ggtree(tree_A_m, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 270)+
  theme(axis.text.x = element_text(size=11,angle=0))
p_A_m

lineage_A_m <-  p_A_m%<+% metadata_df+ 
  geom_tippoint(aes(subset=(group=='coastal'),fill=county_abbr),size=2, stroke=0.2,shape=21)+
  #geom_tippoint(aes(subset=(group=='coastal'),shape=county),size=2, stroke=0.2,fill="black")+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),fill='black',size=1,stroke=0.2,color='white',shape=21)+
  scale_fill_manual(values = color_coast[c(-2)])+
  #scale_shape_manual(values = c(21, 23, 24))+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage A", tag = "A")+
  theme_scientific()+
  scale_y_continuous(limits=c(0,270), minor_breaks = seq(0 , 270, 50), breaks = seq(0 ,270, 100))+
  scale_x_continuous(limits = c(0, 0.0011, 0.0005), breaks=seq(0, 0.0011, 0.0005),labels = function(x) format(x, scientific = FALSE))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = "bottom",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(nrow=1, title = "County", title.position = "left"), size=T)

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/A/Fig.7B.pdf", width = 2.374, height = 3.51)
print(lineage_A_m)
dev.off()
lineage_A_m


##########################################################............lineage B.1.1, mutational tree..........##############################################################
tree_B_m <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B/new_tree.nwk")
p_B_m <- ggtree(tree_B_m, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 310)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_B_m

lineage_B_m <-  p_B_m%<+% metadata_df+
  geom_tippoint(aes(subset=(group=='coastal'), fill=county),size=2, stroke=0.2,colour="black", shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),fill='black',size=1,stroke=0.2,color='white',shape=21)+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B", tag="B")+
  theme_scientific()+
  scale_fill_manual(values = color_coast)+
  scale_y_continuous(limits=c(0,310), minor_breaks = seq(0 , 310, 50), breaks = seq(0 ,310, 100))+
  scale_x_continuous(limits = c(0, 0.0011, 0.0005),breaks=seq(0, 0.0011, 0.0005),labels = function(x) format(x, scientific = FALSE))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(shape=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/B/Fig.7B.pdf", width = 2.374, height = 3.51)
print(lineage_B_m)
dev.off()
lineage_B_m
#####################################.....................Figure 9B...................#########################################
#(a) Mutational tree
tre_B.1 <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B.1/new_tree.nwk")

p_B.1m <- ggtree(tre_B.1, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 6000)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_B.1m


lineage_B.1m <-  p_B.1m%<+% metadata_df+
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,colour="black",shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),color='black',size=1,stroke=0.2,fill='white',shape=21)+
  scale_fill_manual(values = color_coast)+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B.1" , tag = "C")+
  theme_scientific()+
  scale_y_continuous(limits=c(0,5900), minor_breaks = seq(0 , 5900, 1000), breaks = seq(0 ,5900, 2000))+
  scale_x_continuous(limits = c(0, 0.0021),breaks=seq(0, 0.0021, 0.001),labels = function(x) format(x, scientific = FALSE))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "right",
        # legend.position = c(0.5, 0.95),
        legend.key.size = unit(0.20, "cm"),
        legend.spacing.x = unit(0.20, 'cm'),
        legend.spacing.y = unit(0.20, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol =1, title = "County", title.position = "top"), size=T)
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/B.1/Fig.7B.pdf", width = 2.374, height = 3.51)
print(lineage_B.1m)
dev.off()
lineage_B.1m

##########################################################............lineage B.1.1, mutational tree..........##############################################################
tree_B.1.1_m <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B.1.1/new_tree.nwk")
p_B.1.1_m <- ggtree(tree_B.1.1_m, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 3500)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_B.1.1_m

lineage_B.1.1_m <-  p_B.1.1_m%<+% metadata_df+ 
  # geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=3, stroke=0.2,shape=21)+
  # geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),fill='black',size=2,stroke=0.2,color='grey60',shape=21)+
  # scale_fill_manual(values = color_B.1.1)+
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,colour="black", shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),colour='black',size=1,stroke=0.2,fill='white',shape=21)+
  scale_fill_manual(values = color_coast)+
  scale_x_continuous(limits = c(0, 0.0016),breaks=seq(0, 0.0012, 0.001),labels = function(x) format(x, scientific = FALSE))+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B.1.1", tag = "D")+
  theme_scientific()+
  scale_y_continuous(limits=c(0,3500), minor_breaks = seq(0 , 3500, 500), breaks = seq(0 ,3500, 1000))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.9/B.1.1/Fig.7D.pdf", width = 2.374, height = 3.51)
print(lineage_B.1.1_m)
dev.off()
lineage_B.1.1_m




##########################################################............lineage B.1.351, mutational tree..........##############################################################
tree_B.1.351m <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B.1.351/new_tree.nwk")
p_B.1.351m <- ggtree(tree_B.1.351m, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 6000)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_B.1.351m

lineage_B.1.351m <-  p_B.1.351m%<+% metadata_df+ 
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B.1.351", tag = "E")+
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,colour="black", shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),color='black',size=1,stroke=0.2,fill='white',shape=21)+
  scale_fill_manual(values = color_coast)+
  theme_scientific()+
  scale_y_continuous(limits=c(0,5900), minor_breaks = seq(0 , 5900, 500), breaks = seq(0 ,5900, 2000))+
  scale_x_continuous(labels = function(x) format(x, scientific = FALSE))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/B.1.351/Fig.7E.pdf", width = 2.374, height = 3.51)
print(lineage_B.1.351m)
dev.off()
lineage_B.1.351m


# (b) Mutational version
tre_530 <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B.1.530/B.1.530_phylo_dta_14Dec2021.aligned.nwk")
p_530m <- ggtree(tre_530, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 80)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_530m

lineage_530m <-  p_530m%<+% metadata_df+ 
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,colour="black", shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),color='black',size=1,stroke=0.2,fill='white',shape=21)+
  scale_fill_manual(values = color_coast[c(-5)])+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B.1.530", tag = "F")+
  theme_scientific()+
  scale_y_continuous(limits=c(0,80), minor_breaks = seq(0 , 80, 10), breaks = seq(0 ,80, 20))+
  scale_x_continuous(limits = c(0, 0.0007),breaks=seq(0, 0.0007, 0.0003),labels = function(x) format(x, scientific = FALSE))+
  theme(plot.title=element_text(size=11),axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/B.1.530/Fig.7F.pdf", width = 2.374, height = 3.51)
print(lineage_530m)
dev.off()
lineage_530m

# (b) Mutational version
tre_549 <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B.1.549/new_tree.nwk")

m_549 <- ggtree(tre_549, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 160)+
  theme(axis.text.x = element_text(size = 11,angle=0))
m_549


lineage_549m <-  m_549%<+% metadata_df+ 
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,colour="black", shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),fill='white',size=1,stroke=0.2,color='black',shape=21)+
  scale_fill_manual(values = color_coast[(-5)])+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B.1.549", tag="G")+
  theme_scientific()+
  scale_x_continuous(limits = c(0, 0.0011),breaks=seq(0, 0.001, 0.0005),labels = function(x) format(x, scientific = FALSE))+
   scale_y_continuous(limits=c(0,160), minor_breaks = seq(0 , 160, 20), breaks = seq(0 ,160, 40))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.20, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/B.1.549/Fig.7G.pdf", width = 2.374, height = 3.51)
print(lineage_549m)
dev.off()
lineage_549m

#####################################.....................Figure 7H...................#########################################

# b) Mutational tree
tre_N.8 <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/N.8/new_tree.nwk")



p_N.8m <- ggtree(tre_N.8, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 180)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_N.8m

lineage_N.8m <-  p_N.8m%<+% metadata_df+ 
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),fill='white',size=1,stroke=0.2,color='black',shape=21)+
  scale_fill_manual(values = color_coast[c(1,2,6)])+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage N.8", tag = "H")+
  theme_scientific()+
  scale_x_continuous(limits = c(0, 0.0014),breaks=seq(0, 0.001, 0.0005),labels = function(x) format(x, scientific = FALSE))+
  scale_y_continuous(limits=c(0,180), minor_breaks = seq(0 , 180, 25), breaks = seq(0 ,180, 50))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/N.8/Fig.7a.pdf", width = 2.374, height = 3.51)
print(lineage_N.8m)
dev.off()
lineage_N.8m


#(b) mutational tree
tre_596.1 <-read.tree("~/Dropbox/COVID-19/SECONDWAVE/global/lineages/B.1.596.1/B.1.596.1_phylo_dta_14Dec2021.aligned.nwk")

p_596.1m <- ggtree(tre_596.1, color='grey40',size=0.2) + 
  theme_tree2()+
  expand_limits(y = 50)+
  theme(axis.text.x = element_text(size = 11,angle=0))
p_596.1m

lineage_596.1m <-  p_596.1m%<+% metadata_df+
  geom_tippoint(aes(subset=(group=='coastal'),fill=county),size=2, stroke=0.2,colour="black", shape=21)+
  geom_tippoint(aes(subset=(group=='non_Coastal_Kenya')),color='black',size=1,stroke=0.2,fill='white',shape=21)+
  scale_fill_manual(values = color_coast)+
  labs(y="Number of sequences", x="Genetic distance", title = "Lineage B.1.596.1", tag = "I")+
  theme_scientific()+
  scale_x_continuous(limits=c(0, 0.0007), breaks=seq(0, 0.0007, 0.0003), labels = function(x) format(x, scientific = FALSE))+
  scale_y_continuous(limits=c(0,50), minor_breaks = seq(0 , 50, 10), breaks = seq(0 ,50, 20))+
  theme(plot.title=element_text(size=11),
        axis.title.x = element_text(size = 11),
        axis.title.y =  element_text(size = 11),
        axis.text.x = element_text(size = 11),
        axis.text.y= element_text(size = 11),
        legend.position = "none",
        #legend.position = c(0.25, 0.8),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background = element_rect(fill="#FFFFFF", color = NA),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=1, title = "County", title.position = "top"), size=T)
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/B.1.596.1/Fig.7I.pdf", width = 2.374, height = 3.51)
print(lineage_596.1m)
dev.off()
lineage_596.1m

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.7/Figure 7.pdf", width = 8.5, height = 8.5)
Lineage_mutation = lineage_A_m+lineage_B_m+lineage_B.1m+lineage_B.1.1_m+lineage_B.1.351m+lineage_530m+lineage_549m+lineage_N.8m+lineage_596.1m+plot_layout(guides = "collect")
print(Lineage_mutation)
dev.off()
Lineage_mutation



